from typing import Dict

from centralized_verification.agents.decentralized_training.independent_agents.q_learner import QLearner
from centralized_verification.agents.decentralized_training.multi_agent_wrapper import MultiAgentLearnerWrapper
from centralized_verification.configuration import Configuration, TrainingLimits
from centralized_verification.envs.fast_grid_world_partial_obs import FastGridWorldPartialObs
from centralized_verification.envs.utils import map_parser
from centralized_verification.shields.no_shield import NoShield


def make_basic_pentagon_config_iql(run_name: str, q_learner_params: Dict = None, randomize_starts: bool = True):
    if not q_learner_params:
        q_learner_params = {}

    env_spec = map_parser("maps/Pentagon.txt")
    env = FastGridWorldPartialObs(*env_spec, randomize_starts=randomize_starts)

    agents = list(QLearner(obs_space, action_space, **q_learner_params) for obs_space, action_space in
                  zip(env.agent_obs_spaces(), env.agent_actions_spaces()))

    multi_agent = MultiAgentLearnerWrapper(agents)

    return Configuration(
        shield=NoShield(env),
        env=env,
        learner=multi_agent,
        run_name=run_name,
        limits=TrainingLimits(max_episode_len=500, max_total_steps=int(1e6)),
        num_log_entries=200
    )
